#install.packages(c("GGally", "naniar", "plotly"))
library(tidyverse)
library(GGally)
library(naniar)
library(plotly)
library(corrplot)
hep <- read_csv("Hep2012.csv")
hep <- hep[,c("Athlete", "100m Hurdles","High Jump", "Shot Put","200m" , "Long Jump","Javelin", "800m" )]
hep <- hep %>%
mutate(total = rowSums(across(`100m Hurdles`:`800m`))) %>%
arrange(desc(total))
cor <- cor(cor(hep[,2:8],use = "pairwise.complete.obs"))
corrplot(cor)
High
Jump and long jump have a high positive correlation and the javelin and
200m have a high negative correlation
library(tidyverse)
plot <- ggpairs(hep ,columns = 2:8, columnLables = c("100m Hurdles" , "High Jump" , "Shot Put" , "200m" , "Long Jump" , "Javelin" , "800m") , aes(text = Athlete) )
ggplotly(plot , tooltip = c("text" , "x" , "y")) %>%
highlight(on ="plotly_hover" , off = "plotly_deselect")
Shot and 200m by SKUJYTE Austra is an outlier and so is 200m and 800m
by OESER Jennifier
Jennifier didnt run her 800m resulting in it being an outlier , as for
Austra she also has a zero for her 200m resulting the data being
skewed.
library(MASS)
parcoord(hep[,c(2,3,4,5,6,7,8)], scale = "globalMinMax" )
Yes you can see loads of atheletes with zero score in multiple events
potentially due to injury or other incidents.
hep2 <- hep %>% mutate(Winners = hep$Athlete[c(1:3 , 4:38)] )
hep2$Winners[hep2$Winners != c("ENNIS Jessica" , "SCHWARZKOPF Lilli" ,"CHERNOVA Tatyana")] <- "Other"
cols <- c( "ENNIS Jessica" = "red" , Others = "grey")
ggparcoord(hep2 , columns = 2:8 , groupColumn = "Winners" , scale = "globalminmax") + scale_color_manual(values =c("ENNIS Jessica" = "red" , "SCHWARZKOPF Lilli"="blue", "CHERNOVA Tatyana"="orange" , "Other" = "grey"))
hepc <- hep
for (i in 2:8) hepc[,i] <- rank(rowSums(hep[,2:i], na.rm=T))
ggparcoord(hepc , columns = 2:8 , groupColumn = "Athlete" , scale = "globalminmax") + scale_color_manual(values =c("ENNIS Jessica" = "red" , "SCHWARZKOPF Lilli"="blue", "CHERNOVA Tatyana"="orange" , "Other" = "grey"))
No you can clearly see from this graph that none of them lead every
event at once Jessica actually is only 2nd in one and the first for the
rest as for 2nd and 3rd place they vary from position to position in
each event and did not lead in any event actually, so in coclusion
jessica was only one event off leading every single event in the
heptholon .